
** Set working directory to "JOP replication files" folder

*** 1. Male HH member placebo 

* Import data 
use "DATA FILES TO SHARE/IHDS_RAW/37382-0011-Data.dta", clear

 *** HERE THE HHPLITID takes on the HHSPITID from 2012 that's hwy we use it instead of HHSPILTID2012
gen str13 hhuid_using2012 = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2012,"%03.0f") + string(HHSPLITID,"%02.0f") + string(SURVEY,"%02.0f")
egen y = group(hhuid_using2012)
sum y

** loks like it is not that each obs is repeated twice. possible that some are unique 
 bys hhuid_using2012:  gen dup = cond(_N==1,0,_n)
 tab dup
 sort hhuid_using2012
 
 save "DATA FILES TO SHARE/TEMP_FILES/hh_uid.dta", replace

use "DATA FILES TO SHARE/IHDS_RAW/37382-0005-Data.dta", clear

** create a dummy for if thhis individual is the household head and is also male 
gen head_male_tag = 0 
replace head_male_tag = 1 if RO3 == 1 & RO4 == 1
keep if head_male_tag == 1
** now we want to get this HHID for these individuals 
gen str11 hhuid_using2012_noyr = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2012,"%03.0f") + string(HHSPLITID2012,"%02.0f")
 bys hhuid_using2012_noyr:  gen dup2 = cond(_N==1,0,_n)

 tab dup2
 sort hhuid_using2012_noyr
 
 ** looks like there are some in this that are either surveyd in only one wave 
 
 drop if dup2 ==0


gen str13 hhuid_using2012 = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2012,"%03.0f") + string(HHSPLITID2012,"%02.0f") + string(SURVEY,"%02.0f")

** we have generated a HHUID for each WAVE 


** just chekcing for duplicates
 bys hhuid_using2012:  gen dup = cond(_N==1,0,_n)
 tab dup
 sort hhuid_using2012
 
 ** no duplicates -- all are unique 
 keep hhuid_using2012 hhuid_using2012_noyr RO3 RO4 RO5 RO6 RO8 
 
 
 merge 1:1 hhuid_using2012 using "DATA FILES TO SHARE/TEMP_FILES/hh_uid.dta"

 ** great so we have the 53940 from the individual dataset matched here. 
 ** those are the households with a male head who answerd thsi household questionnaire. 
 
 *** Now let us analyze
 
 keep if _merge == 3
 
 ** Here we just identify if the household had any non-resident in either wave 
 ** now we don't have anything in the non-res type variable but we have the NNR - number of non-residents in the HH 
 
 tab NNR
 
 /* So there are over 6k HH with at least 1 mogrant 
 
   HQ5 3.0 # |
         hh |
nonresident |
          s |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     44,919       83.28       83.28
          1 |      6,221       11.53       94.81
          2 |      2,119        3.93       98.74
          3 |        482        0.89       99.63
          4 |        147        0.27       99.90
          5 |         36        0.07       99.97
          6 |          9        0.02       99.99
          7 |          3        0.01       99.99
          8 |          2        0.00      100.00
          9 |          2        0.00      100.00
------------+-----------------------------------
      Total |     53,940      100.00

 */
  tab NNR SURVEY
  
  /*
  
 HQ5 3.0 # |
        hh | IHDS1 (2005) or IHDS2
nonresiden |        (2012)
        ts |   IHDS1 1    IHDS2 2 |     Total
-----------+----------------------+----------
         0 |    24,185     20,734 |    44,919 
         1 |     1,984      4,237 |     6,221 
         2 |       632      1,487 |     2,119 
         3 |       117        365 |       482 
         4 |        43        104 |       147 
         5 |         7         29 |        36 
         6 |         2          7 |         9 
         7 |         0          3 |         3 
         8 |         0          2 |         2 
         9 |         0          2 |         2 
-----------+----------------------+----------
     Total |    26,970     26,970 |    53,940 


  */
 
 ** but we have data on INCREMIT -- which would imply that the HH has a member who migrated for economic reasons - let's see if there is a match on INCREMIT and NNR
 
 
  gen non_res_NNR = 0 
 replace non_res_NNR = 1 if NNR>0
tab non_res_NNR SURVEY

/*

           | IHDS1 (2005) or IHDS2
non_res_NN |        (2012)
         R |   IHDS1 1    IHDS2 2 |     Total
-----------+----------------------+----------
         0 |    24,185     20,734 |    44,919 
         1 |     2,785      6,236 |     9,021 
-----------+----------------------+----------
     Total |    26,970     26,970 |    53,940 

*/


 
gen non_res = 0 
replace  non_res = 1 if INCREMIT >0 
tab non_res SURVEY

/*

           | IHDS1 (2005) or IHDS2
           |        (2012)
   non_res |   IHDS1 1    IHDS2 2 |     Total
-----------+----------------------+----------
         0 |    25,700     23,836 |    49,536 
         1 |     1,270      3,134 |     4,404 
-----------+----------------------+----------
     Total |    26,970     26,970 |    53,940 

*/

 ** So the number of non-res per the INCREMIT (4404) is less than the NNR (9021) variable. SInce having a a non res irrespective of if they send money still creates te male absnece, we will use NNR. But first make sure the INCREMIT non-res are a subset of the NNR 
 
 gen non_res_check = 0 
replace non_res_check = 1 if non_res_NNR == 1 & non_res == 1

** so 4404 of them match between the two non-res - this is the number of nonres nby the INCREMIT vareiable so clearly a subset of NNR - so the whole universe of non-res in the NNR . 


** let us create the non-res variable for the analysis 

 gen non_res_w1 = 0
replace non_res_w1 = 1 if non_res_NNR == 1 & SURVEY == 1

** identify the non-res in wave 1

gen non_res_w2 = 0
replace non_res_w2 = 1 if non_res_NNR == 1 & SURVEY == 2
tab non_res_w1 non_res_w2

 
gen year = 0 
replace year = 1 if SURVEY == 2  
tab year


 gen did_sample  = 0 
 replace did_sample = 1 if  non_res_w1 == 0 & non_res_w2 == 0
  replace did_sample = 1 if  non_res_w1 == 0 & non_res_w2 == 1
tab did_sample

/*
 did_sample |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      2,785        5.16        5.16
          1 |     51,155       94.84      100.00
------------+-----------------------------------
      Total |     53,940      100.00

*/

gen did = year * non_res_w2

** let's make the UID for villages

gen str6 vill_id = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") 
egen z = group(vill_id)
sum z
destring vill_id, generate(vill_id_num)
destring hhuid_using2012_noyr, generate(hhuid)


save "DATA FILES TO SHARE/TEMP_FILES/male_placebo.dta", replace

keep non_res vill_id_num ME13 INCREMIT GROUPS6 RO5 INCOME URBAN INCOME5 FM1 NPERSONS hhuid did_sample year did 

save "DATA FILES TO SHARE/male_placebo_df.dta", replace




***2. Female placebo 

use "DATA FILES TO SHARE/IHDS_RAW/37382-0005-Data.dta", clear

** create a dummy for if thhis individual is the household head and is also female 
gen head_female_tag = 0 
replace head_female_tag = 1 if RO3 == 2 & RO4 == 1

keep if head_female_tag == 1
** now we want to get this HHID for these individuals 
gen str11 hhuid_using2012_noyr = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2012,"%03.0f") + string(HHSPLITID2012,"%02.0f")
 bys hhuid_using2012_noyr:  gen dup2 = cond(_N==1,0,_n)

 tab dup2
 sort hhuid_using2012_noyr
 
 ** looks like there are some in this that are either surveyd in only one wave 
 
 drop if dup2 ==0


gen str13 hhuid_using2012 = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2012,"%03.0f") + string(HHSPLITID2012,"%02.0f") + string(SURVEY,"%02.0f")

** we have generated a HHUID for each WAVE 


** just chekcing for duplicates
 bys hhuid_using2012:  gen dup = cond(_N==1,0,_n)
 tab dup
 sort hhuid_using2012
 
 ** no duplicates -- all are unique 
 keep hhuid_using2012 hhuid_using2012_noyr RO3 RO4 RO5 RO6 RO8 
 
 
 merge 1:1 hhuid_using2012 using "DATA FILES TO SHARE/TEMP_FILES/hh_uid.dta"


 *** Now let us analyze so let's keep the merged 
 

 keep if _merge == 3
 
 
 ** Note, there is a variable here called NNR - number of non-res in the HH 
 tab NNR 

 /* HQ5 3.0 # | So it says aout 3407 are non-residents. 
         hh |
nonresident |
          s |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      3,407       69.90       69.90
          1 |      1,030       21.13       91.03
          2 |        311        6.38       97.41
          3 |         87        1.78       99.20
          4 |         30        0.62       99.82
          5 |          8        0.16       99.98
          6 |          1        0.02      100.00
------------+-----------------------------------
      Total |      4,874      100.00

 */

 
 ** However, we use INCREMIT since it yields 3740 HH that received remittances. We urtilize this variable as it is possible they  were not included somehow. and perhaps more accurate 
 ** But let's do a sanity check 
 
 gen non_res_NNR = 0 
 replace non_res_NNR = 1 if NNR>0
 

 /*
 
 
   HQ5 3.0 # |
         hh |
nonresident |
          s |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      3,407       69.90       69.90
          1 |      1,030       21.13       91.03
          2 |        311        6.38       97.41
          3 |         87        1.78       99.20
          4 |         30        0.62       99.82
          5 |          8        0.16       99.98
          6 |          1        0.02      100.00
------------+-----------------------------------

 */
 
gen non_res = 0 
replace  non_res = 1 if INCREMIT >0 
tab non_res SURVEY

/*
           | IHDS1 (2005) or IHDS2
           |        (2012)
   non_res |   IHDS1 1    IHDS2 2 |     Total
-----------+----------------------+----------
         0 |     1,949      1,791 |     3,740 
         1 |       488        646 |     1,134 ## non-res 
-----------+----------------------+----------
     Total |     2,437      2,437 |     4,874

*/

gen non_res_check = 0 
replace non_res_check = 1 if non_res_NNR == 1 & non_res == 1


** so 1,134 of them match between the two non-res - so only 1134 out of the 1467 (per the NNR variable). Perhaps only these sent home remittances. 

 gen non_res_w1 = 0
replace non_res_w1 = 1 if non_res_NNR == 1 & SURVEY == 1

** identify the non-res in wave 1

gen non_res_w2 = 0
replace non_res_w2 = 1 if non_res_NNR == 1 & SURVEY == 2
tab non_res_w1 non_res_w2


/*

           |      non_res_w2
non_res_w1 |         0          1 |     Total
-----------+----------------------+----------
         0 |     3,407        808 |     4,215 
         1 |       659          0 |       659 
-----------+----------------------+----------
     Total |     4,066        808 |     4,874 

. 

*/


** identifying the ones who would go into the ideal did sample
** basically hh with any type of non-residents 

 gen did_sample  = 0 
 replace did_sample = 1 if  non_res_w1 == 0 & non_res_w2 == 0
  replace did_sample = 1 if  non_res_w1 == 0 & non_res_w2 == 1
tab did_sample 

/*
 did_sample |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |        659       13.52       13.52
          1 |      4,215       86.48      100.00
------------+-----------------------------------
      Total |      4,874      100.00

*/

gen year = 0 
replace year = 1 if SURVEY == 2 

tab year
** let's make the UID for villages

gen str6 vill_id = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") 
egen z = group(vill_id)
sum z
destring vill_id, generate(vill_id_num)
** destring the hhuid for regression
destring hhuid_using2012_noyr, generate(hhuid)



** let's only have women who are ober 34 -- just to make sure they are not in the eligible women's module 
 bys year: count if RO5<34 
 
 drop if RO5<34 
 
 
 
  bys hhuid_using2012_noyr:  gen dup2 = cond(_N==1,0,_n)

 tab dup2
 sort hhuid_using2012_noyr
 
 keep if dup2>0

 
 ** that leaves us with the following did_sample 
 tab did_sample
 
 /*
  did_sample |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |        570       12.73       12.73
          1 |      3,908       87.27      100.00
------------+-----------------------------------
      Total |      4,478      100.00

 */
 
  gen did = year * non_res_w2

save "DATA FILES TO SHARE/TEMP_FILES/female_placebo.dta", replace
keep ME13 non_res vill_id_num GROUPS6 RO5 RO6 INCOME URBAN INCOME5 FM1 NPERSONS year did did_sample hhuid_using2012_noyr 

save "DATA FILES TO SHARE/female_placebo_df.dta", replace




 